package sparksql_fenxi.T3

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.col

object t1 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder()
      .appName("test")
      .master("local[*]")
      .enableHiveSupport()
      .getOrCreate()

    val data = spark.read.format("csv")
      .option("header", "true")
      .option("delimiter", ",")
      .load("hdfs://192.168.40.110:9000/tmp/restaurant.csv")


    //  todo 第一题
    val r1 = data.drop("_c10")
      .drop("_c11")
      .na.drop()


    println("第一题结果")
    r1.show()

    //  todo 第二题:赛选出口味评分大于7分的数据
    val r2 = r1.filter(col("口味") > 7)

    r2.show

    // todo 第三题:统计各类别餐饮店点评数，并按降序排列
    r2.createTempView("temp")
    val r3 = spark.sql(
      """
        |select
        |`类别`,
        |number
        |from(
        |select distinct
        |`类别`,
        |count(*) over(partition by `类别`) as number
        |from temp
        |) as r1
        |order by number desc
        |""".stripMargin)

    r3.show

    r2.write.format("csv")
      .mode("overwrite")
      .save("hdfs://192.168.40.110:9000/tmp/r2.csv")

    r3.write.format("csv")
      .mode("overwrite")
      .save("hdfs://192.168.40.110:9000/tmp/r3.csv")







    spark.close()
  }

}
